from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# 创建 WebDriver 对象，启动 Chrome 浏览器
driver = webdriver.Chrome()

# 目标文档的 ID 和起始页数
doc_id = 'url路径'

try:
    # 循环读取每一页的内容
    for page_num in range(1, 49):

        # 第一页 URL
        if page_num == 1:
            url = f'https://url地址/{doc_id}.html'
        # 其他页 URL
        else:
            url = f'https://url地址/{doc_id}-{page_num}.html'

        # 访问目标网页
        driver.get(url)

        # 等待目标元素出现
        wait = WebDriverWait(driver, 20)
        element = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="contents"]')))
        # driver.implicitly_wait(20)
        # element = driver.find_element_by_xpath('//*[@id="contents"]')

        # 获取目标元素文本
        text = element.text

        # 打印所有文本
        # print(f'>>>>>>> 第 {page_num} 页 >>>>>>>')
        # print(text)

        # 将文本保存到文件中
        filename = f'h:\\Japan_{page_num}.txt'
        with open(filename, 'w', encoding='utf-8') as f:
            f.write(text)

finally:
    # 关闭浏览器
    driver.quit()